<?php

require_once 'HTMLPurifier/EntityLookup.php';
require_once 'HTMLPurifier/Encoder.php';

// if want to implement error collecting here, we'll need to use some sort
// of global data (probably trigger_error) because it's impossible to pass
// $config or $context to the callback functions.

/**
 * Handles referencing and derefencing character entities
 */
class HTMLPurifier_EntityParser
{
	
	/**
	 * Reference to entity lookup table.
	 * @protected
	 */
	var $_entity_lookup;
	
	/**
	 * Callback regex string for parsing entities.
	 * @protected
	 */							 
	var $_substituteEntitiesRegex =
'/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
//	 1. hex			 2. dec	  3. string (XML style)
	
	
	/**
	 * Decimal to parsed string conversion table for special entities.
	 * @protected
	 */
	var $_special_dec2str =
			array(
					34 => '"',
					38 => '&',
					39 => "'",
					60 => '<',
					62 => '>'
			);
	
	/**
	 * Stripped entity names to decimal conversion table for special entities.
	 * @protected
	 */
	var $_special_ent2dec =
			array(
					'quot' => 34,
					'amp'  => 38,
					'lt'   => 60,
					'gt'   => 62
			);
	
	/**
	 * Substitutes non-special entities with their parsed equivalents. Since
	 * running this whenever you have parsed character is t3h 5uck, we run
	 * it before everything else.
	 * 
	 * @protected
	 * @param $string String to have non-special entities parsed.
	 * @returns Parsed string.
	 */
	function substituteNonSpecialEntities($string) {
		// it will try to detect missing semicolons, but don't rely on it
		return preg_replace_callback(
			$this->_substituteEntitiesRegex,
			array($this, 'nonSpecialEntityCallback'),
			$string
			);
	}
	
	/**
	 * Callback function for substituteNonSpecialEntities() that does the work.
	 * 
	 * @warning Though this is public in order to let the callback happen,
	 *		  calling it directly is not recommended.
	 * @param $matches  PCRE matches array, with 0 the entire match, and
	 *				  either index 1, 2 or 3 set with a hex value, dec value,
	 *				  or string (respectively).
	 * @returns Replacement string.
	 */
	
	function nonSpecialEntityCallback($matches) {
		// replaces all but big five
		$entity = $matches[0];
		$is_num = (@$matches[0][1] === '#');
		if ($is_num) {
			$is_hex = (@$entity[2] === 'x');
			$code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
			
			// abort for special characters
			if (isset($this->_special_dec2str[$code]))  return $entity;
			
			return HTMLPurifier_Encoder::unichr($code);
		} else {
			if (isset($this->_special_ent2dec[$matches[3]])) return $entity;
			if (!$this->_entity_lookup) {
				$this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
			}
			if (isset($this->_entity_lookup->table[$matches[3]])) {
				return $this->_entity_lookup->table[$matches[3]];
			} else {
				return $entity;
			}
		}
	}
	
	/**
	 * Substitutes only special entities with their parsed equivalents.
	 * 
	 * @notice We try to avoid calling this function because otherwise, it
	 * would have to be called a lot (for every parsed section).
	 * 
	 * @protected
	 * @param $string String to have non-special entities parsed.
	 * @returns Parsed string.
	 */
	function substituteSpecialEntities($string) {
		return preg_replace_callback(
			$this->_substituteEntitiesRegex,
			array($this, 'specialEntityCallback'),
			$string);
	}
	
	/**
	 * Callback function for substituteSpecialEntities() that does the work.
	 * 
	 * This callback has same syntax as nonSpecialEntityCallback().
	 * 
	 * @warning Though this is public in order to let the callback happen,
	 *		  calling it directly is not recommended.
	 * @param $matches  PCRE-style matches array, with 0 the entire match, and
	 *				  either index 1, 2 or 3 set with a hex value, dec value,
	 *				  or string (respectively).
	 * @returns Replacement string.
	 */
	function specialEntityCallback($matches) {
		$entity = $matches[0];
		$is_num = (@$matches[0][1] === '#');
		if ($is_num) {
			$is_hex = (@$entity[2] === 'x');
			$int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
			return isset($this->_special_dec2str[$int]) ?
				$this->_special_dec2str[$int] :
				$entity;
		} else {
			return isset($this->_special_ent2dec[$matches[3]]) ?
				$this->_special_ent2dec[$matches[3]] :
				$entity;
		}
	}
	
}

